## [1] "/Users/zgold/Documents/GitHub/WCOA21"

General Observations of of the dataset

sample_data %>% 
  left_join(env_data) %>% 
  filter(., !is.na(`Sampling method`)) %>% 
  mutate(`Depth (m)`=as.numeric(`Depth`),
         CTDTEMP_ITS90=as.numeric(CTDTEMP_ITS90),
         CTDOXY=as.numeric(CTDOXY),
         Longitude=as.numeric(Longitude),
         Latitude=as.numeric(Latitude)) -> combined
## Joining with `by = join_by(Sample_ID)`
pal <- park_palette("ChannelIslands", 5)
pal2 <- park_palette("Yellowstone", 5)

pal3 <- park_palette("Arches", 5)
pal4 <- park_palette("Hawaii", 5)

combined %>% 
  ggplot(aes(x=CTDTEMP_ITS90, y=CTDOXY, colour=`Line_ID`)) +geom_count() +scale_color_manual(values=c(pal,pal2,pal3,pal4))
## Warning: Removed 30 rows containing non-finite values (`stat_sum()`).

Plot by Line

combined %>% 
  ggplot(aes(x=Longitude, colour=CTDOXY, y=-log(`Depth (m)`))) +geom_count()+facet_grid(Line_ID~., scales = "free_x")
## Warning: Removed 30 rows containing non-finite values (`stat_sum()`).

General Statistics

Total Unique Samples:

## [1] "717"

Depth Distribution of Samples:

Vast majority of WCOA samples are taken within the top 100m.

Cruise Map

The biological stations are visible here as they are sampled far more frequently.

Selection of Key Stations

Which of Line 0 to keep

combined %>% 
  filter(., Line_ID %in% c("0")) %>% 
  filter(., Station_ID %in% c("22","23"))%>% 
  dplyr::select(Station_ID) %>% distinct()  -> line_0_to_keep

combined %>% 
    filter(., Line_ID %in% c("0")) %>% 
    filter(., `Depth (m)` < 100) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_0
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_0, aes(x = Longitude, y = Latitude,size=n_depths, colour=Line_ID)) +
    coord_sf(xlim = c(-126, -123), ylim = c(47, 49), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_0, aes(x = Longitude, y = Latitude, label=Station_ID)) 

Which of Line 2 to keep

combined %>% 
  filter(., Line_ID %in% c("2")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_2
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_2, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-127, -124), ylim = c(48, 50), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_2, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("2")) %>% 
  filter(., Station_ID %in% c("16","14","12","10","8")) %>% 
  dplyr::select(Station_ID) %>% distinct() -> line_2_to_keep

Which of Line 3 to keep

combined %>% 
  filter(., Line_ID %in% c("3")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_3
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_3, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-127, -124), ylim = c(46, 50), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_3, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("3")) %>% 
  dplyr::select(Station_ID) %>% distinct() -> line_3_to_keep

Which of Line 4 to keep

combined %>% 
  filter(., Line_ID %in% c("4")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_4
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_4, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-127, -124), ylim = c(46, 48), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_4, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("4")) %>% 
  filter(., Station_ID %in% c("37","40","41","42","43"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_4_to_keep

Which of Line 5 to keep

combined %>% 
  filter(., Line_ID %in% c("5")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_5
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_5, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-126, -123), ylim = c(45, 47), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_5, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("5")) %>% 
  filter(., Station_ID %in% c("30","32","33","34","26"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_5_to_keep

Which of Line 6 to keep

combined %>% 
  filter(., Line_ID %in% c("6")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_6
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_6, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-125.5, -123.5), ylim = c(44, 45), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_6, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("6")) %>% 
  filter(., Station_ID %in% c("47","48","49","50","52"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_6_to_keep

Which of Line 7 to keep

combined %>% 
  filter(., Line_ID %in% c("7")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_7
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_7, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-126, -123.5), ylim = c(43, 44.5), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_7, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("7")) %>% 
    filter(., Station_ID %in% c("54","55","56","57","58"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_7_to_keep

Which of Line 8 to keep

combined %>% 
  filter(., Line_ID %in% c("8")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_8
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_8, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-125.5, -123.5), ylim = c(41.5, 42.5), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_8, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("8")) %>% 
  filter(., Station_ID %in% c("62","63","65","66","67"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_8_to_keep

Which of Line 9 to keep

combined %>% 
  filter(., Line_ID %in% c("9")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_9
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_9, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-125.5, -123.5), ylim = c(39.5, 40.5), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_9, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("9")) %>% 
  filter(., Station_ID %in% c("70","71","72","73","75"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_9_to_keep

Which of Line 11 to keep

combined %>% 
  filter(., Line_ID %in% c("11")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_11
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_11, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-124, -122), ylim = c(37, 39), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_11, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("11")) %>% 
  filter(., Station_ID %in% c("79","81","82","83","84"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_11_to_keep

Which of Line 13 to keep

combined %>% 
  filter(., Line_ID %in% c("13")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_13
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_13, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-124, -121), ylim = c(35.8, 37), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_13, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("13")) %>% 
  filter(., Station_ID %in% c("97","98","101","103"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_13_to_keep

Which of Line 14 to keep

combined %>% 
  filter(., Line_ID %in% c("14")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_14
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_14, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-124, -121), ylim = c(34, 36), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_14, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("14"))%>% 
    filter(., Station_ID %in% c("107","106","105","104"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_14_to_keep

Which of Line 15 to keep

combined %>% 
  filter(., Line_ID %in% c("15")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_15
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_15, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-124, -120), ylim = c(32, 35), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_15, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("15")) %>% 
  filter(., Station_ID %in% c("110","111","113","114","117"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_15_to_keep

Which of Line 16 to keep

combined %>% 
  filter(., Line_ID %in% c("16")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_16
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_16, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-121, -118), ylim = c(33.5, 35), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_16, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("16")) %>% 
    filter(., Station_ID %in% c("118","120","121","122","123"))%>% 
  dplyr::select(Station_ID) %>% distinct() -> line_16_to_keep

Which of Line 17 to keep

combined %>% 
  filter(., Line_ID %in% c("17")) %>% 
  group_by(Longitude,Latitude,Line_ID,Station_ID) %>% 
  dplyr::summarise(n_depths = n_distinct(`Depth (m)`)) %>% 
  arrange(Line_ID) -> summed_station_line_17
## `summarise()` has grouped output by 'Longitude', 'Latitude', 'Line_ID'. You can
## override using the `.groups` argument.
ggplot(data = world) +
    geom_sf() +
    geom_point(data = summed_station_line_17, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-121, -117), ylim = c(31, 34), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = summed_station_line_17, aes(x = Longitude, y = Latitude, label=Station_ID)) 

combined %>% 
  filter(., Line_ID %in% c("17")) %>% 
  filter(., Station_ID %in% c("124","126","127","129","131")) %>% 
  dplyr::select(Station_ID) %>% distinct() -> line_17_to_keep

Selected Stations

combined %>% 
    filter(., Station_ID %in% c(line_0_to_keep$Station_ID,line_2_to_keep$Station_ID,line_3_to_keep$Station_ID,line_4_to_keep$Station_ID,line_5_to_keep$Station_ID,line_6_to_keep$Station_ID,line_7_to_keep$Station_ID,line_8_to_keep$Station_ID,line_9_to_keep$Station_ID,line_11_to_keep$Station_ID,line_13_to_keep$Station_ID,line_14_to_keep$Station_ID,line_15_to_keep$Station_ID,line_16_to_keep$Station_ID,line_17_to_keep$Station_ID)) -> combined_station_to_keep

Select Newport and Heceta Head line for benthic samples

combined_station_to_keep %>% 
  filter(., Line_ID %in% c("6","7")) %>% 
    filter(., `Depth (m)` > 100) %>% 
  filter(., Longitude > -125) %>% 
  group_by(Station_ID,Line_ID) %>% 
  dplyr::summarise(max_depth = max(`Depth (m)`)) %>% 
  unite(., "station_line", Station_ID:Line_ID, sep=":", remove=F) -> max_depths_6_7
## `summarise()` has grouped output by 'Station_ID'. You can override using the
## `.groups` argument.
combined_station_to_keep %>% 
  filter(., Line_ID %in% c("6","7")) %>%
 unite(., "station_line", c("Station_ID","Line_ID"), sep=":", remove=F) %>% 
  filter(., station_line %in% max_depths_6_7$station_line) -> max_depth_to_keep_6_7

combined_station_to_keep %>% 
  filter(., Line_ID %in% c("6","7")) %>% 
  ggplot(., aes(x=Longitude, y=-`Depth (m)`))+geom_point() +facet_grid(Station_ID~Line_ID)

Combine

combined_station_to_keep %>% 
  mutate(., to_keep = case_when(Sample_ID %in% max_depth_to_keep_6_7$Sample_ID ~ "keep",
                                `Depth (m)` < 100 ~ "keep",
                                TRUE ~"drop")) %>% 
filter(., to_keep =="keep") -> samples_to_keep

General Stats

samples_to_keep %>% 
dplyr::summarise(n_distinct(`FINAL Sample NAME`), n_distinct(Sample_ID), n_distinct(`Sequential G3 Sample No.`))
## # A tibble: 1 × 3
##   n_distinct(\FINAL Sample NAME\…¹ n_distinct(Sample_ID…² n_distinct(\Sequenti…³
##                              <int>                  <int>                  <int>
## 1                              290                    281                    198
## # ℹ abbreviated names: ¹​`n_distinct(\`FINAL Sample NAME\`)`,
## #   ²​`n_distinct(Sample_ID)`, ³​`n_distinct(\`Sequential G3 Sample No.\`)`

Map of Selected Stations

ggplot(data = world) +
    geom_sf() +
    geom_point(data = samples_to_keep, aes(x = Longitude, y = Latitude, colour=Line_ID)) +
    coord_sf(xlim = c(-127, -117), ylim = c(31, 50), expand = FALSE) +theme_bw() +xlab("Longitude") +ylab("Latitude") +scale_color_manual(values=c(pal,pal2,pal3,pal4)) +
  geom_text(data = samples_to_keep, aes(x = Longitude, y = Latitude, label=Station_ID)) 

Depth Distribution of Selected Samples

O2 vs. Temp of Samples

samples_to_keep %>% 
  ggplot(aes(x=CTDTEMP_ITS90, y=CTDOXY, colour=`Line_ID`)) +geom_count() +scale_color_manual(values=c(pal,pal2,pal3,pal4))

# O2, Temp, Depth

samples_to_keep %>% 
  mutate(., pH_T_measured = as.numeric(pH_T_measured)) %>% 
  ggplot(aes(x=CTDTEMP_ITS90, y=CTDOXY, colour=log(`Depth (m)`))) +geom_count() 

### Save Selected Samples

write.csv(samples_to_keep, file=here("zacks_suggested_stations_20230418.csv"))